clear all
global system "linux"

if "${system}" == "linux" {
	global code "/"
	global s "/"
}

run "${code}${s}_set-path.do"

use "${cached}/embs-GSE-loanlevel-combined.dta", clear // created in 1c-combine-loanlevel-GSEdata.do

// sample selection
keep if inlist(embs_product, "FHLG30", "FHLG30HILTV", "FHLG30JM", "FNM30", "FNM30HILTV",  "FNM30JM") 
keep if inlist(state_id, "CA","NY","NJ", "MA", "WA") | inlist(state_id, "MD", "VA", "CT", "UT", "DC")  | inlist(state_id, "RI", "NH")  // keep only state_ids including high-cost counties
keep if orig_term == 360
keep if num_units==1
keep if orig_ym >= ym(2009, 1)
keep if proptype_id == "SF" // keep only single-family properties

// clean variables
do "${code}/_clean-embs-vars.do"

drop if orig_loan_amount == .
drop if orig_loan_to_value == .
keep if Purchase==1 
keep if OwnerOccupied==1

gen homeprice = orig_loan_amount / orig_loan_to_value* 100

gen tbaelig = tbaeligcode_id == "Y"

// random ids for lenders and borrowers
egen seller_id_id = group(seller_id)
egen embs_id = group(loanseqnum_id)

keep embs_id orig_rate orig_loan_amount tbaelig state_id seller_id_id orig_ym homeprice credit_score ///
	orig_loan_to_value debt_to_income mtoi *Orig OwnerOccupied agency_id


save "${temp}/embs-prep.dta", replace
